#######################################################
####                                               ####
####  Input: MP-by-debate data for analysis        ####
####  Output: Age robustness check                 ####
####                                               ####
#######################################################

# Load libraries

library(data.table) # v1.14.8
library(plyr) # v1.8.9
library(ggplot2) # v3.4.3
library(sandwich) # v3.0-2
library(dplyr) # v1.1.3
library(broom) # v1.0.5
library(plm) # v2.6-3
library(margins) # v0.3.26
library(estimatr) # v1.0.0
library(clubSandwich) # v0.5.10
library(stargazer) # v5.2.3

# Load data

load("working/speech_scores.Rdata")

# Gender gaps and age --------------------------------------------------------------------------------------------------------------

# Regression --------------------------------------------------------------------------------------------------------------

women_1 <-
  lm(women_dic_std ~ gender * age_years, data = speech_scores)

women_2 <-
  lm(
    women_dic_std ~ gender * age_years + party + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
women_3 <-
  lm(
    women_dic_std ~ gender * age_years + party   + women_minister_gov + gov_mp +
      women_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(women_1, clusters = speech_scores$person_id, se_type = "stata")
my_clustered_errors[[2]] <- sqrt(diag(vcovCL(women_2, ~ person_id)))
my_clustered_errors[[3]] <- sqrt(diag(vcovCL(women_3, ~ person_id)))

save(women_1,
     women_2,
     women_3,
     my_clustered_errors,
     file = "analysis/models/women_age.Rdata")

health_1 <-
  lm(health_std ~ gender * age_years, data = speech_scores)
health_2 <-
  lm(
    health_std ~ gender * age_years + party   + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
health_3 <-
  lm(
    health_std ~ gender * age_years + party   + health_minister_gov + gov_mp +
      health_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(health_1, clusters = speech_scores$person_id, se_type = "stata")
my_clustered_errors[[2]] <- sqrt(diag(vcovCL(health_2, ~ person_id)))
my_clustered_errors[[3]] <- sqrt(diag(vcovCL(health_3, ~ person_id)))

save(health_1,
     health_2,
     health_3,
     my_clustered_errors,
     file = "analysis/models/health_age.Rdata")

education_1 <-
  lm(education_std ~ gender * age_years, data = speech_scores)
education_2 <-
  lm(
    education_std ~ gender * age_years + party   + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
education_3 <-
  lm(
    education_std ~ gender * age_years + party   + education_minister_gov + gov_mp +
      education_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(education_1,
           clusters = speech_scores$person_id,
           se_type = "stata")
my_clustered_errors[[2]] <-
  sqrt(diag(vcovCL(education_2, ~ person_id)))
my_clustered_errors[[3]] <-
  sqrt(diag(vcovCL(education_3, ~ person_id)))

save(
  education_1,
  education_2,
  education_3,
  my_clustered_errors,
  file = "analysis/models/education_age.Rdata"
)

social_1 <-
  lm(social_std ~ gender * age_years, data = speech_scores)
social_2 <-
  lm(
    social_std ~ gender * age_years + party   + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
social_3 <-
  lm(
    social_std ~ gender * age_years + party   + welfare_minister_gov + gov_mp +
      welfare_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(social_1, clusters = speech_scores$person_id, se_type = "stata")
my_clustered_errors[[2]] <- sqrt(diag(vcovCL(social_2, ~ person_id)))
my_clustered_errors[[3]] <- sqrt(diag(vcovCL(social_3, ~ person_id)))

save(social_1,
     social_2,
     social_3,
     my_clustered_errors,
     file = "analysis/models/social_age.Rdata")

environment_1 <-
  lm(environment_std ~ gender * age_years, data = speech_scores)
environment_2 <-
  lm(
    environment_std ~ gender * age_years + party   + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
environment_3 <-
  lm(
    environment_std ~ gender * age_years + party   + environment_minister_gov + gov_mp +
      environment_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(environment_1,
           clusters = speech_scores$person_id,
           se_type = "stata")
my_clustered_errors[[2]] <-
  sqrt(diag(vcovCL(environment_2, ~ person_id)))
my_clustered_errors[[3]] <-
  sqrt(diag(vcovCL(environment_3, ~ person_id)))

save(
  environment_1,
  environment_2,
  environment_3,
  my_clustered_errors,
  file = "analysis/models/environment_age.Rdata"
)

defence_1 <-
  lm(defence_std ~ gender * age_years, data = speech_scores)
defence_2 <-
  lm(
    defence_std ~ gender * age_years + party   + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
defence_3 <-
  lm(
    defence_std ~ gender * age_years + party   + defence_minister_gov + gov_mp +
      defence_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(defence_1, clusters = speech_scores$person_id, se_type = "stata")
my_clustered_errors[[2]] <-
  sqrt(diag(vcovCL(defence_2, ~ person_id)))
my_clustered_errors[[3]] <-
  sqrt(diag(vcovCL(defence_3, ~ person_id)))

save(defence_1,
     defence_2,
     defence_3,
     my_clustered_errors,
     file = "analysis/models/defence_age.Rdata")

economy_1 <-
  lm(economy_std ~ gender * age_years, data = speech_scores)
economy_2 <-
  lm(
    economy_std ~ gender * age_years + party   + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
economy_3 <-
  lm(
    economy_std ~ gender * age_years + party   + economy_minister_gov + gov_mp +
      economy_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(economy_1, clusters = speech_scores$person_id, se_type = "stata")
my_clustered_errors[[2]] <-
  sqrt(diag(vcovCL(economy_2, ~ person_id)))
my_clustered_errors[[3]] <-
  sqrt(diag(vcovCL(economy_3, ~ person_id)))

save(economy_1,
     economy_2,
     economy_3,
     my_clustered_errors,
     file = "analysis/models/economy_age.Rdata")

agriculture_1 <-
  lm(agriculture_std ~ gender * age_years, data = speech_scores)
agriculture_2 <-
  lm(
    agriculture_std ~ gender * age_years + party + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
agriculture_3 <-
  lm(
    agriculture_std ~ gender * age_years + party   + agriculture_minister_gov + gov_mp +
      agriculture_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(agriculture_1,
           clusters = speech_scores$person_id,
           se_type = "stata")
my_clustered_errors[[2]] <-
  sqrt(diag(vcovCL(agriculture_2, ~ person_id)))
my_clustered_errors[[3]] <-
  sqrt(diag(vcovCL(agriculture_3, ~ person_id)))

save(
  agriculture_1,
  agriculture_2,
  agriculture_3,
  my_clustered_errors,
  file = "analysis/models/agriculture_age.Rdata"
)

trade_1 <-
  lm(trade_std ~ gender * age_years, data = speech_scores)
trade_2 <-
  lm(
    trade_std ~ gender * age_years + party   + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
trade_3 <-
  lm(
    trade_std ~ gender * age_years + party   + trade_minister_gov + gov_mp +
      trade_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(trade_1, clusters = speech_scores$person_id, se_type = "stata")
my_clustered_errors[[2]] <- sqrt(diag(vcovCL(trade_2, ~ person_id)))
my_clustered_errors[[3]] <- sqrt(diag(vcovCL(trade_3, ~ person_id)))

save(trade_1,
     trade_2,
     trade_3,

     my_clustered_errors,
     file = "analysis/models/trade_age.Rdata")

crime_1 <-
  lm(crime_std ~ gender * age_years, data = speech_scores)
crime_2 <-
  lm(
    crime_std ~ gender * age_years + party   + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
crime_3 <-
  lm(
    crime_std ~ gender * age_years + party   + crime_minister_gov + gov_mp +
      crime_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(crime_1, clusters = speech_scores$person_id, se_type = "stata")
my_clustered_errors[[2]] <- sqrt(diag(vcovCL(crime_2, ~ person_id)))
my_clustered_errors[[3]] <- sqrt(diag(vcovCL(crime_3, ~ person_id)))

save(crime_1,
     crime_2,
     crime_3,
     my_clustered_errors,
     file = "analysis/models/crime_age.Rdata")

transport_1 <-
  lm(transport_std ~ gender * age_years, data = speech_scores)
transport_2 <-
  lm(
    transport_std ~ gender * age_years + party   + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
transport_3 <-
  lm(
    transport_std ~ gender * age_years + party   + transport_minister_gov + gov_mp +
      transport_minister_opp + marginality + occupation_class,
    data = speech_scores
  )

my_clustered_errors <-
  starprep(transport_1,
           clusters = speech_scores$person_id,
           se_type = "stata")
my_clustered_errors[[2]] <-
  sqrt(diag(vcovCL(transport_2, ~ person_id)))
my_clustered_errors[[3]] <-
  sqrt(diag(vcovCL(transport_3, ~ person_id)))

save(
  transport_1,
  transport_2,
  transport_3,
  my_clustered_errors,
  file = "analysis/models/transport_age.Rdata"
)

children_1 <-
  lm(children_std ~ gender * age_years, data = speech_scores)
children_2 <-
  lm(
    children_std ~ gender * age_years + party   + attends_cabinet +
      attends_shadow_cabinet + is_gov_minister + is_opp_minister + gov_mp +
      is_committee_chair + marginality + occupation_class,
    data = speech_scores
  )
children_3 <-
  lm(
    children_std ~ gender * age_years + family_minister_opp + marginality + gov_mp
    + occupation_class,
    data = speech_scores
  )


my_clustered_errors <-
  starprep(children_1,
           clusters = speech_scores$person_id,
           se_type = "stata")
my_clustered_errors[[2]] <-
  sqrt(diag(vcovCL(children_2, ~ person_id)))
my_clustered_errors[[3]] <-
  sqrt(diag(vcovCL(children_3, ~ person_id)))

save(
  children_1,
  children_2,
  children_3,
  my_clustered_errors,
  file = "analysis/models/children_age.Rdata"
)
